accidents <- read_csv("data/accidents.csv") |>
mutate(
time_hour = hour(hms(time)) + minute(hms(time)) / 60,
severity = factor(severity,
levels = c("Fatal",
"Serious",
"Slight")),
day_of_week = factor(day_of_week,
levels = rev(c("Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday")))
) |>
filter(!is.na(time_hour))
ggplot(
accidents,
aes(x = time_hour,
y = day_of_week,
fill = severity)
) +
geom_density_ridges(
alpha = 0.6,
scale = 1
) +
scale_x_continuous(
breaks = seq(0, 24, by = 4),
labels = sprintf("%02d:00",
seq(0, 24, by = 4))
) +
scale_fill_manual(
values = c("Fatal" = "#000000",
"Serious" = "#D55E00",
"Slight" = "#56B4E9")
) +
coord_cartesian(xlim = c(0, 24)) +
labs(
title = "Traffic Accidents by Day of Week and Time of Day",
subtitle = "Colored by Severity",
x = "Time of Day",
y = "Day of Week",
fill = "Severity",
caption = "Source: Road traffic dataset in Edinburgh,UK \nUK Government produced data 2018"
) +
theme(
plot.title.position = "plot"
)HW 04
1 - A second chance
I received full marks on each problem for Homework 1, so I chose to do problem 1 over again. For this project on road traffic accidents in Edinburgh (2018), I initially visualized the distribution of accidents across time of day, split by weekday/weekend and severity, using a density plot. I chose this plot to explore temporal patterns in accident severity — for example, when serious or fatal accidents are more likely to occur — and how these patterns change between by day of week.
I plan to split the data not by weekday vs weekend, but by each day of the week to see if the trends are faceted more than originally shown in the plot. The colors are not originally great to look at, I planned to use a colorblind palette to help with this, using more contrasting colors. Additionally, I plan to use a ridge-plot (geom_density_ridges) to show the relationships for all the days of the week in order.
2. Arizona state of counties
az_counties <- counties(state = "AZ",
year = 2021,
progress_bar = FALSE) |>
mutate(
name = gsub("\\s+County$", "", NAME),
x = st_coordinates(st_centroid(geometry))[, 1],
y = st_coordinates(st_centroid(geometry))[, 2]
) |> glimpse()Rows: 15
Columns: 21
$ STATEFP <chr> "04", "04", "04", "04", "04", "04", "04", "04"…
$ COUNTYFP <chr> "027", "021", "017", "011", "013", "019", "003…
$ COUNTYNS <chr> "00023901", "00025447", "00042808", "00042807"…
$ GEOID <chr> "04027", "04021", "04017", "04011", "04013", "…
$ NAME <chr> "Yuma", "Pinal", "Navajo", "Greenlee", "Marico…
$ NAMELSAD <chr> "Yuma County", "Pinal County", "Navajo County"…
$ LSAD <chr> "06", "06", "06", "06", "06", "06", "06", "06"…
$ CLASSFP <chr> "H1", "H1", "H1", "H1", "H1", "H1", "H1", "H1"…
$ MTFCC <chr> "G4020", "G4020", "G4020", "G4020", "G4020", "…
$ CSAFP <chr> NA, "429", NA, NA, "429", "536", NA, NA, "429"…
$ CBSAFP <chr> "49740", "38060", "43320", NA, "38060", "46060…
$ METDIVFP <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ FUNCSTAT <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "…
$ ALAND <dbl> 14280774791, 13899612888, 25769070671, 4771128…
$ AWATER <dbl> 13253159, 20747424, 24116169, 13746346, 621734…
$ INTPTLAT <chr> "+32.7739424", "+32.9185207", "+35.3907852", "…
$ INTPTLON <chr> "-113.9109050", "-111.3663394", "-110.3210248"…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-114.4732 3..., M…
$ name <chr> "Yuma", "Pinal", "Navajo", "Greenlee", "Marico…
$ x <dbl> -113.9056, -111.3447, -110.3204, -109.2401, -1…
$ y <dbl> 32.76885, 32.90460, 35.39018, 33.21415, 33.348…
ggplot(az_counties) +
geom_sf(fill = 'grey90',
color = "grey10") +
geom_label_repel(aes(x = x,
y = y,
label = NAME),
size = 3,
min.segment.length = 0.1,
box.padding = 0.5,
segment.color = "grey20") +
coord_sf() +
labs(
title = "Counties in Arizona State",
caption = "Source: Shapefile obtained using {tigris} R package, v2.2.1",
x = "Longitude",
y = "Latitude"
)Resources
Found help with the labeling using the st_coordinates and the geom_label_repel (R Graph Gallery 2024b),(Pebesma, Edzer 2025).
3. Arizona state of population change
az_counties <- counties(
state = "AZ",
year = 2021,
progress_bar = FALSE
)
pop_data <- read_excel("data/co-est2023-pop-04.xlsx",
skip = 5,
n_max = 15,
col_names = c("county", "base_2020", "pop_2020",
"pop_2021", "pop_2022", "pop_2023"))
pop_data <- pop_data |>
mutate(
county = sub("\\.(.+) County, Arizona", "\\1", county),
total_pop_change_20_23 = pop_2023 - pop_2020
) |>
select(-c(base_2020, pop_2020, pop_2021, pop_2022, pop_2023))
pop_data# A tibble: 15 × 2
county total_pop_change_20_23
<chr> <dbl>
1 Apache -877
2 Cochise -887
3 Coconino -720
4 Gila 652
5 Graham 879
6 Greenlee -159
7 La Paz 141
8 Maricopa 140812
9 Mohave 9497
10 Navajo 2412
11 Pima 17987
12 Pinal 54052
13 Santa Cruz 1446
14 Yavapai 11864
15 Yuma 7562
az_data <- az_counties |>
left_join(
pop_data,
by = c("NAME" = "county")
)
rdbu_palette <- rev(brewer.pal(5, "RdBu"))
ggplot(data = az_data) +
geom_sf(aes(fill = total_pop_change_20_23),
color = "white") +
scale_fill_gradientn(colors = rdbu_palette,
name = "Population change",
labels = function(x) format(x, big.mark = ",")) +
coord_sf() +
labs(
title = "Resident Population Change for Counties in AZ",
subtitle = "July 01, 2020 to July 01, 2023",
caption = "Source: Shapefile obtained using {tigris} R package, v2.2.1\npopulation change data from the US Census Bureau",
x = "Longitude",
y = "Latitude"
) +
theme(
plot.title.position = "plot"
)Resources
Found help with the color filling using the scale_fill_gradientn function and then the rdbu palette as well (Wickham et al. 2024), (R Graph Gallery 2024a).
4. Arizona state of Indiginous Tribal Regions
az_counties <- counties(
state = "AZ",
year = 2021,
progress_bar = FALSE
)
tribal_data <- st_read("data/American_Indian_Reservations_in_Arizona.shp") |>
st_transform(crs = st_crs("EPSG:4269")) |>
mutate(
x = st_coordinates(st_centroid(geometry))[, 1],
y = st_coordinates(st_centroid(geometry))[, 2]
) |>
glimpse()Reading layer `American_Indian_Reservations_in_Arizona' from data source `/home/wscott/UA/info526/homework/hw-04-westscotty/data/American_Indian_Reservations_in_Arizona.shp'
using driver `ESRI Shapefile'
Simple feature collection with 28 features and 18 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -114.814 ymin: 31.50811 xmax: -109.0452 ymax: 37.00399
Geodetic CRS: WGS 84
Rows: 28
Columns: 21
$ OBJECTID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1…
$ AIANNHCE <chr> "2680", "4708", "4200", "1720", "3355", "423…
$ AIANNHNS <chr> "02419049", "00027214", "00023763", "0002399…
$ GEOID <chr> "2680R", "4708R", "4200R", "1720R", "3355R",…
$ NAME <chr> "Pascua Yaqui Tribe", "Yavapai-Apache Tribe"…
$ NAMELSAD <chr> "Pascua Pueblo Yaqui", "Yavapai-Apache Natio…
$ LSAD <chr> "86", "86", "86", "96", "86", "86", "86", "8…
$ CLASSFP <chr> "D8", "D2", "D8", "D2", "D2", "D8", "D8", "D…
$ COMPTYP <chr> "R", "R", "R", "R", "R", "R", "R", "R", "R",…
$ AIANNHR <chr> "F", "F", "F", "F", "F", "F", "F", "F", "F",…
$ MTFCC <chr> "G2101", "G2101", "G2101", "G2101", "G2101",…
$ FUNCSTAT <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A",…
$ ALAND <dbl> 3621547, 7437921, 11534915791, 489295372, 75…
$ AWATER <int> 0, 0, 883039, 19920, 62469236, 0, 2784365, 5…
$ INTPTLAT <chr> "+32.1117152", "+34.6288299", "+32.1500358",…
$ INTPTLON <chr> "-111.0821095", "-111.9140889", "-112.070402…
$ Shape__Are <dbl> 38980187, 80045178, 124144205908, 5261984989…
$ Shape__Len <dbl> 39772.457, 99348.734, 2306219.107, 350630.98…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-111.0631 3...,…
$ x <dbl> -111.0821, -111.9141, -112.0449, -112.6797, …
$ y <dbl> 32.11172, 34.62883, 32.15019, 36.91273, 33.3…
ggplot(az_counties) +
geom_sf(
fill = 'grey90',
color = "white"
) +
geom_sf(
data = tribal_data,
linewidth = 1,
fill = NA,
color = "black"
) +
geom_label_repel(
data = tribal_data |>
filter(NAME %in% c("Hopi Tribe",
"Navajo Nation",
"White Mountain Apache Tribe",
"San Carlos Apache Tribe",
"Tohono O’odham Nation")),
aes(x = x,
y = y,
label = NAME),
size = 4,
min.segment.length = 0.1,
box.padding = 0.5,
segment.color = "grey20"
) +
coord_sf() +
labs(
title = "Indigenous Tribal Boundaries in AZ",
caption = "Source: Shapefile obtained using {tigris} R package, v2.2.1\nIndigenous Tribe Shapefile obtained from AZGeo Data",
x = "Longitude",
y = "Latitude"
) +
theme(
plot.title.position = "plot"
)Resources
Found help with reading a shapefile using the st_read function (Holtz 2023). Found help for using st_transform for converting coordinate systems (Heiss 2023).
5. Arizona state of patchwork
az_counties <- counties(state = "AZ",
year = 2021,
progress_bar = FALSE) |>
mutate(
name = gsub("\\s+County$", "", NAME),
x = st_coordinates(st_centroid(geometry))[, 1],
y = st_coordinates(st_centroid(geometry))[, 2]
)
tribal_data <- st_read("data/American_Indian_Reservations_in_Arizona.shp") |>
st_transform(crs = st_crs("EPSG:4269")) |>
mutate(
x = st_coordinates(st_centroid(geometry))[, 1],
y = st_coordinates(st_centroid(geometry))[, 2]
)Reading layer `American_Indian_Reservations_in_Arizona' from data source `/home/wscott/UA/info526/homework/hw-04-westscotty/data/American_Indian_Reservations_in_Arizona.shp'
using driver `ESRI Shapefile'
Simple feature collection with 28 features and 18 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -114.814 ymin: 31.50811 xmax: -109.0452 ymax: 37.00399
Geodetic CRS: WGS 84
pop_data <- read_excel("data/co-est2023-pop-04.xlsx",
skip = 5,
n_max = 15,
col_names = c("county", "base_2020", "pop_2020",
"pop_2021", "pop_2022", "pop_2023"))
pop_data <- pop_data |>
mutate(
county = sub("\\.(.+) County, Arizona", "\\1", county),
total_pop_change_20_23 = pop_2023 - pop_2020
) |>
select(-c(base_2020, pop_2020, pop_2021, pop_2022, pop_2023))
az_data <- az_counties |>
left_join(
pop_data,
by = c("NAME" = "county")
)
rdbu_palette <- rev(brewer.pal(5, "RdBu"))main_plot <- ggplot(data = az_data) +
geom_sf(
aes(fill = total_pop_change_20_23),
color = "white"
) +
scale_fill_gradientn(
colors = rdbu_palette,
name = "Population change",
labels = function(x) format(x, big.mark = ","),
guide = guide_colorbar(barwidth = 9,
barheight = 1,
direction = "horizontal",
title.position = "top")
) +
geom_rect(
aes(xmin = -113.5,
xmax = -110,
ymin = 31.25,
ymax = 34.25),
fill = NA,
color = "black",
linetype = "dashed",
linewidth = 0.5
) +
geom_segment(
data = data.frame(x = c(-113.5, -110),
y = c(34.25, 31.25),
xend = c(-122, -116.75),
yend = c(32.75, 28)),
aes(x = x,
y = y,
xend = xend,
yend = yend),
color = "black",
linetype = "dashed",
linewidth = 0.5
) +
geom_label_repel(
data = filter(az_counties, name %in% c("Maricopa", "Pinal", "Pima")),
aes(x = x,
y = y,
label = NAME),
size = 4,
min.segment.length = 0.1,
box.padding = 0.5,
segment.color = "grey20"
) +
coord_sf(
xlim = c(-122, -109),
ylim = c(28.5, 37)
) +
labs(
title = "Resident Population Change for Counties in AZ",
subtitle = "July 01, 2020 to July 01, 2023",
caption = "Source: Shapefile obtained using {tigris} R package, v2.2.1\npopulation change data from the US Census Bureau\nIndigenous Tribe Shapefile obtained from AZGeo Data",
x = "Longitude",
y = "Latitude"
) +
theme(
legend.position = c(0.0, 0.7),
legend.justification = c(0, 0),
plot.title.position = "plot"
)zoom_plot <- ggplot() +
geom_sf(
data = filter(az_data,
name %in% c("Maricopa",
"Pinal",
"Pima",
"Santa Cruz",
"Gila",
"Yavapai")),
aes(fill = total_pop_change_20_23),
color = "white"
) +
geom_sf(
data = tribal_data,
fill = NA,
color = "black",
linewidth = 0.75
) +
scale_fill_gradientn(
colors = rdbu_palette,
name = NULL,
labels = NULL,
limits = range(az_data$total_pop_change_20_23,
na.rm = TRUE)
) +
geom_label_repel(
data = filter(tribal_data,
NAME %in% c("White Mountain Apache Tribe",
"San Carlos Apache Tribe",
"Tohono O’odham Nation")),
aes(x = x,
y = y,
label = NAME),
size = 3,
box.padding = 0.5,
min.segment.length = 0
) +
coord_sf(
xlim = c(-113.5, -110),
ylim = c(31.25, 34.25)
) +
theme_void() +
theme(
panel.background = element_rect(fill = "grey50"),
legend.position = "none"
)final_map <- main_plot +
inset_element(
zoom_plot,
left = 0.0,
bottom = 0.0,
right = 0.5,
top = 0.5
)
final_mapResources
Found help with the patchwork inset_element (Pedersen 2024). Found help with drawing the line dashed line segments for geom_segment to emphasize the zoom window (Wickham 2024). Found help with drawing the dashed line box around the zoomed portion on the main plot using geom_rect (GeeksforGeeks 2023). I read through a few resources helping my get an idea of how to even approach a zoom window,, this source was quite instructive (Pebesma 2018).